Loading packages:
library(Seurat)
library(tidyverse)
library(gprofiler2)
library(sleepwalk)
library(SCINA)
library(gridExtra)
theme_set(theme_bw(base_size = 14))#Loading data
setwd("/Users/gexin/Library/CloudStorage/OneDrive-UniversityofCambridge/CamBioHackathon 2024/Processed data to be loaded in R")
data_dir_1 <- '/Users/gexin/Library/CloudStorage/OneDrive-UniversityofCambridge/CamBioHackathon 2024/Processed data to be loaded in R/de_novo_AML1'
data_dir_2 <- '/Users/gexin/Library/CloudStorage/OneDrive-UniversityofCambridge/CamBioHackathon 2024/Processed data to be loaded in R/de_novo_AML2'
data_dir_3 <- '/Users/gexin/Library/CloudStorage/OneDrive-UniversityofCambridge/CamBioHackathon 2024/Processed data to be loaded in R/de_novo_AML3'
AML_1 <- Read10X(
data_dir_1,
gene.column = 2,
cell.column = 1,
unique.features = TRUE,
strip.suffix = FALSE
)
AML_2 <- Read10X(
data_dir_2,
gene.column = 2,
cell.column = 1,
unique.features = TRUE,
strip.suffix = FALSE
)
AML_3 <- Read10X(
data_dir_3,
gene.column = 2,
cell.column = 1,
unique.features = TRUE,
strip.suffix = FALSE
)seurat_object_AML_1 = CreateSeuratObject(AML_1)
seurat_object_AML_2 = CreateSeuratObject(AML_2)
seurat_object_AML_3 = CreateSeuratObject(AML_3)seurat_object_AML_1$orig.ident = 'AML_1'
seurat_object_AML_2$orig.ident = 'AML_2'
seurat_object_AML_3$orig.ident = 'AML_3'
denovo_AML_data <- merge(seurat_object_AML_1, y = c(seurat_object_AML_2, seurat_object_AML_3), project = "AML_merged")## Warning in CheckDuplicateCellNames(object.list = objects): Some cell names are
## duplicated across objects provided. Renaming to enforce unique cell names.
as_tibble(denovo_AML_data@meta.data) %>%
group_by(orig.ident) %>%
summarise(initial_cells=n()) -> cell_counts
cell_countsPercentageFeatureSet(denovo_AML_data,pattern="^MT-") -> denovo_AML_data$percent_mt
PercentageFeatureSet(denovo_AML_data,pattern="MALAT1") -> denovo_AML_data$percent_Malat
apply(denovo_AML_data@assays$RNA@counts, 2, function(x) max((100*x)/sum(x))) -> denovo_AML_data$percent_Largest
denovo_AML_data[[]]as_tibble(denovo_AML_data@meta.data) -> qc_metrics
head(qc_metrics)#QC
qc_metrics %>%
pivot_longer(cols=-orig.ident, names_to="metric",values_to="value") %>%
ggplot(aes(x=orig.ident, y=value)) +
geom_violin(fill="lightskyblue")+
facet_grid(rows=vars(metric), scales="free_y")ggsave(paste0("Violin plot linear scale no intercept "
, format(Sys.time(), "%Y-%m-%d")
, ".pdf"), width = 8, height = 10, dpi = 300)
qc_metrics %>%
pivot_longer(cols=-orig.ident, names_to="metric",values_to="value") %>%
ggplot(aes(x=orig.ident, y=log(value))) +
geom_violin(fill="lightskyblue")+
facet_grid(rows=vars(metric), scales="free_y")ggsave(paste0("Violin plot log scale no intercept "
, format(Sys.time(), "%Y-%m-%d")
, ".pdf"), width = 8, height = 10, dpi = 300)qc_metrics %>%
ggplot(aes(x=orig.ident, y=log(nFeature_RNA))) +
geom_violin(fill="lightskyblue")+
geom_hline(yintercept = log(1500))+
geom_hline(yintercept = log(8800))ggsave(paste0("Violin plot log scale yintercept 1000 to 8800 feature "
, format(Sys.time(), "%Y-%m-%d")
, ".pdf"), width = 8, height = 5, dpi = 300)
qc_metrics %>%
ggplot(aes(x=orig.ident, y=log(percent_mt))) +
geom_violin(fill="lightskyblue")+
geom_hline(yintercept = log(20))## Warning: Removed 37 rows containing non-finite values (`stat_ydensity()`).
ggsave(paste0("Violin plot log scale yintercept 15 percent_mt "
, format(Sys.time(), "%Y-%m-%d")
, ".pdf"), width = 8, height = 5, dpi = 300)## Warning: Removed 37 rows containing non-finite values (`stat_ydensity()`).
qc_metrics %>%
ggplot(aes(x=orig.ident, y=log(percent_Largest))) +
geom_violin(fill="lightskyblue")+
geom_hline(yintercept = log(15))ggsave(paste0("Violin plot log scale yintercept 20 percent_largest_gene "
, format(Sys.time(), "%Y-%m-%d")
, ".pdf"), width = 8, height = 5, dpi = 300)
qc_metrics %>%
ggplot(aes(x=orig.ident, y=log(percent_Malat))) +
geom_violin(fill="lightskyblue")+
geom_hline(yintercept = log(20))## Warning: Removed 179 rows containing non-finite values (`stat_ydensity()`).
ggsave(paste0("Violin plot log scale yintercept 20 percent_Malat "
, format(Sys.time(), "%Y-%m-%d")
, ".pdf"), width = 8, height = 5, dpi = 300)## Warning: Removed 179 rows containing non-finite values (`stat_ydensity()`).
subset(
denovo_AML_data,
nFeature_RNA > 1500 &
nFeature_RNA < 8800 &
percent_mt < 20 &
percent_Malat < 20
) -> denovo_AML_data
denovo_AML_data## An object of class Seurat
## 36601 features across 12599 samples within 1 assay
## Active assay: RNA (36601 features, 0 variable features)
as_tibble(denovo_AML_data@meta.data) -> qc_metrics_filtered
qc_metrics_filtered %>%
pivot_longer(cols=-orig.ident, names_to="metric",values_to="value") %>%
ggplot(aes(x=orig.ident, y=value)) +
geom_violin(fill="lightskyblue")+
facet_grid(rows=vars(metric), scales="free_y")ggsave(paste0("Violin plot post filtering linear scale no intercept "
, format(Sys.time(), "%Y-%m-%d")
, ".pdf"), width = 8, height = 10, dpi = 300)
qc_metrics_filtered %>%
pivot_longer(cols=-orig.ident, names_to="metric",values_to="value") %>%
ggplot(aes(x=orig.ident, y=log(value))) +
geom_violin(fill="lightskyblue")+
facet_grid(rows=vars(metric), scales="free_y")ggsave(paste0("Violin plot post filtering log scale no intercept "
, format(Sys.time(), "%Y-%m-%d")
, ".pdf"), width = 8, height = 10, dpi = 300)setwd("/Users/gexin/Library/CloudStorage/OneDrive-UniversityofCambridge/CamBioHackathon 2024")
saveRDS(denovo_AML_data, file="denovo_AML_data_filtered_latest.RDS")
saveRDS(qc_metrics_filtered, file="denovoAML_qc_metrics_filtered_latest.RDS")#Normalisation
setwd("/Users/gexin/Library/CloudStorage/OneDrive-UniversityofCambridge/CamBioHackathon 2024")
#denovo_AML_data <- readRDS("denovo_AML_data_filtered_latest.RDS")
#qc_metrics_filtered <- readRDS("denovoAML_qc_metrics_filtered_latest.RDS")
NormalizeData(denovo_AML_data, normalization.method = "CLR") -> denovo_AML_data
saveRDS(denovo_AML_data, file="denovo_AML_data_norm_filtered_latest.RDS")Then we can check how well the normalisation actually works
setwd("/Users/gexin/Library/CloudStorage/OneDrive-UniversityofCambridge/CamBioHackathon 2024")
#denovo_AML_data <- readRDS("denovo_AML_data_norm_filtered_latest.RDS")
#qc_metrics_filtered <- readRDS("denovoAML_qc_metrics_filtered_latest.RDS")
subset <- denovo_AML_data@assays$RNA@data[,c(rep(F,69),T)] %>%
as_tibble() %>%
pivot_longer(
cols=everything(),
names_to="cell",
values_to="value"
) %>%
left_join(
as_tibble(denovo_AML_data@meta.data, rownames="cell") %>%
dplyr::select(cell,orig.ident)
)
subset %>%
ggplot(aes(x=value, colour=orig.ident, group=cell)) +
geom_density(linewidth=0.25)+
coord_cartesian(xlim=c(0,3), ylim=c(0,1)) +
scale_colour_brewer(palette = "Set1")ggsave(paste0("Post norm check all samples together "
, format(Sys.time(), "%Y-%m-%d")
, ".pdf"), width = 8, height = 5, dpi = 300)
subset %>%
ggplot(aes(x=value, colour=orig.ident, group=cell)) +
geom_density(linewidth=0.25)+
coord_cartesian(xlim=c(0,3), ylim=c(0,1)) +
scale_colour_brewer(palette = "Set1") +
facet_wrap(vars(orig.ident))ggsave(paste0("Post norm check 4 samples separate "
, format(Sys.time(), "%Y-%m-%d")
, ".pdf"), width = 8, height = 5, dpi = 300)setwd("/Users/gexin/Library/CloudStorage/OneDrive-UniversityofCambridge/CamBioHackathon 2024")
#denovo_AML_data <- readRDS("denovo_AML_data_norm_filtered_latest.RDS")
#qc_metrics_filtered <- readRDS("denovoAML_qc_metrics_filtered_latest.RDS")FindVariableFeatures(
denovo_AML_data,
selection.method = "vst",
nfeatures = 500
) -> denovo_AML_data
as_tibble(HVFInfo(denovo_AML_data),rownames = "Gene") -> variance.data
variance.data %>%
mutate(hypervariable=Gene %in% VariableFeatures(denovo_AML_data)
) -> variance.data
variance.data %>%
arrange(desc(variance.standardized)) -> variance.data
head(variance.data,n=100)setwd("/Users/gexin/Library/CloudStorage/OneDrive-UniversityofCambridge/CamBioHackathon 2024")
variance.data %>%
arrange(hypervariable) %>%
ggplot(aes(x=mean,y=variance, colour=hypervariable)) +
geom_point() +
scale_y_log10() +
scale_x_log10() +
scale_colour_manual(values=c("grey","red2"))## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous x-axis
ggsave(paste0("HVG gene plot "
, format(Sys.time(), "%Y-%m-%d")
, ".pdf"), width = 8, height = 5, dpi = 300)## Warning: Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous x-axis
#Extract gene list for GO analysis
#https://www.bioinformatics.babraham.ac.uk/goliath/goliath.cgi
variance.data %>%
arrange(desc(variance.standardized)) %>%
slice(1:100) %>%
pull(Gene) %>%
sapply(function(x)cat(paste0(x,"\n"))) -> temp## JCHAIN
## IGKC
## IGHA1
## IGHG1
## GNLY
## IGLC2
## MZB1
## PRTN3
## CTSG
## TPSB2
## ELANE
## MPO
## LSAMP
## C1QB
## PTGDS
## TXNDC5
## IL32
## IGHA2
## IFI27
## IGHV3-30
## AZU1
## IGLL1
## IGHG2
## IGLC3
## FCER1A
## C1QC
## IGHV3-33
## NRXN3
## TPSD1
## FN1
## IGLV1-40
## IGHGP
## LTBP1
## BGLAP
## CCL4
## IGKV3-20
## IGLV2-14
## GZMB
## AC092691.1
## PRF1
## IGHG3
## IGLV6-57
## ACSM3
## NKG7
## IGKV1-5
## C1QA
## S100B
## AREG
## IL1R2
## TYMS
## G0S2
## IGHV1-18
## IGHV3-7
## IGLC1
## CD1C
## RRM2
## AQP3
## IGHV3-23
## CCL5
## THBS1
## GZMA
## CST7
## MT1G
## TRBV7-9
## CD27
## GZMH
## TRBV4-2
## MS4A1
## IGHG4
## IGHV4-39
## S100A8
## IGFBP2
## S100A9
## UBE2C
## ZNF385D
## CCDC26
## MS4A3
## AC109466.1
## GZMK
## TRBV9
## CCL4L2
## EREG
## AC233755.1
## IGKV4-1
## DPP10
## DERL3
## TRBV5-1
## MT1H
## HLA-DQA1
## LINC01478
## IGHV3-64D
## LINC02147
## IGHV4-31
## IGHV6-1
## SELENOP
## TNFRSF17
## IGHV3-48
## RHOXF1-AS1
## IGHV4-59
## IGHV3-66
Save variable gene and generated gene list
setwd("/Users/gexin/Library/CloudStorage/OneDrive-UniversityofCambridge/CamBioHackathon 2024")
saveRDS(variance.data, file="variable_genes_latest.RDS")
saveRDS(temp, file="variable_genes_list_for_GO_latest.RDS")A bit of data exploration:
qc_metrics_filtered %>%
pivot_longer(cols=-orig.ident, names_to="metric",values_to="value") %>%
ggplot(aes(x=orig.ident, y=value)) +
geom_violin(fill="lightskyblue")+
facet_grid(rows=vars(metric), scales="free_y")setwd("/Users/gexin/Library/CloudStorage/OneDrive-UniversityofCambridge/CamBioHackathon 2024")
ScaleData(denovo_AML_data, features=rownames(data)) -> denovo_AML_data## Centering and scaling data matrix
RunPCA(
denovo_AML_data,
features=VariableFeatures(denovo_AML_data)
) -> denovo_AML_data## PC_ 1
## Positive: IFITM1, LY6E, ISG15, IFI6, MDK, AL589693.1, IGHM, TRH, BAALC, INPP4B
## ISG20, CD69, TOX, SOX4, NPW, KCNQ5, H1F0, PRSS57, ANGPT1, APP
## CD34, DNTT, IFITM3, CASC15, HLA-DQA1, STMN1, C1QTNF4, CALCRL, HLA-DPB1, HLA-DPA1
## Negative: LYZ, VCAN, S100A9, CFD, S100A8, IFI30, CST3, SERPINA1, MS4A6A, PSAP
## AL163541.1, CD14, FCN1, SLC8A1, CD36, LMNA, CTSD, CD163, NCF1, NEAT1
## MAFB, AC020656.1, ACSL1, CPVL, CRIP1, ARHGAP24, HMOX1, C1QA, FNDC3B, F13A1
## PC_ 2
## Positive: TYMS, PCLAF, AZU1, CTSG, ELANE, IGFBP2, CENPM, UHRF1, RRM2, PLPPR3
## CLSPN, MCM7, TK1, ATP8B4, PCNA, MIR924HG, MKI67, H2AFZ, MYBL2, DIAPH3
## HIST1H4C, STMN1, MCM4, MS4A3, ST8SIA6, PRTN3, HIST1H3B, MPO, TUBB, MEIS1
## Negative: LILRA4, DRD4, LINC01478, SPIB, HLA-DPA1, NIBAN3, LINC01374, UGCG, ZFP36L1, HLA-DQA1
## PLD4, HLA-DPB1, HLA-DRA, EPHB1, CLIC3, VSIG4, PTPRS, CLEC10A, TSPAN13, FCN1
## ZFAT, CD300E, C1QA, CD14, NAPSA, HLA-DQB1, HLA-DRB1, FCGR3A, C1QB, ISG20
## PC_ 3
## Positive: IL32, PRF1, CST7, GZMA, GZMH, CD3E, GZMB, CD247, GZMM, LCK
## SKAP1, KLRD1, SYNE2, FGFBP2, CD3D, GNLY, CCL5, NKG7, CD8A, TRBC1
## TGFBR3, CD3G, TRBC2, BCL11B, ARL4C, THEMIS, CD8B, CCL4, CAMK4, KLRC2
## Negative: PRSS57, MDK, ATP8B4, TRH, ANGPT1, AL589693.1, BAALC, NPW, CD34, KCNQ5
## CASC15, CALCRL, HLA-DRA, SOX4, HSPB1, INPP4B, SLC24A3, TPSAB1, DNTT, H1F0
## C1QTNF4, HLA-DRB1, LINC02147, IFITM3, HLA-DPB1, SPINK2, MAMDC2, IGHM, HLA-DQB1, TOX
## PC_ 4
## Positive: LILRA4, LINC01478, UGCG, DRD4, NIBAN3, PLD4, PTPRS, ZFAT, SPIB, EPHB1
## LINC01374, DERL3, JCHAIN, MZB1, TCF4, CLIC3, IRF4, TSPAN13, IGKC, NAPSA
## TXNDC5, RUNX2, APP, HSP90B1, AFF3, HDAC9, RASD1, MYBL2, SEC11C, HSPA5
## Negative: TRH, MDK, BAALC, AL589693.1, INPP4B, ANGPT1, CD34, CD69, NPW, DNTT
## CCL5, TOX, CALCRL, IL32, MT2A, CASC15, C1QTNF4, IFITM3, IFI6, PRF1
## GZMA, NEAT1, GZMH, PRSS57, CST7, GZMM, ZNF385D, CD3E, HSPB1, ZFP36L1
## PC_ 5
## Positive: HLA-DRB1, HLA-DRA, HLA-DPA1, HLA-DPB1, HLA-DQB1, TUBA1B, HLA-DQA1, H2AFZ, TUBB, IFITM3
## CST3, IFI30, CLEC10A, TYMS, IFI6, HSP90B1, TK1, ISG15, CRIP1, C1QA
## LY6E, HDAC9, PCLAF, FCER1A, PCNA, PSAP, PKIB, MT2A, NAPSA, RRM2
## Negative: ZNF521, LTBP1, ACSM3, SELENOP, CPA3, PLPPR3, DPP10, IGFBP2, LSAMP, IGLL1
## NCAM1, MPO, AC092691.1, MSI2, CNTN1, NRXN3, CAV2, SCN9A, MS4A3, AC244502.1
## CTSG, ELANE, PRTN3, TPSB2, AC109466.1, LINC02147, PTGER3, AZU1, EREG, TPSD1
PCAPlot(denovo_AML_data, dims=c(1,2), group.by= 'orig.ident')ggsave(paste0("PCA dim 1&2 "
, format(Sys.time(), "%Y-%m-%d")
, ".pdf"), width = 8, height = 6, dpi = 300)
PCAPlot(denovo_AML_data, dims=c(3,4), group.by= 'orig.ident')ggsave(paste0("PCA dim 3&4 "
, format(Sys.time(), "%Y-%m-%d")
, ".pdf"), width = 8, height = 6, dpi = 300)
PCAPlot(denovo_AML_data, dims=c(5,6), group.by= 'orig.ident')ggsave(paste0("PCA dim 5&6 "
, format(Sys.time(), "%Y-%m-%d")
, ".pdf"), width = 8, height = 6, dpi = 300)
PCAPlot(denovo_AML_data, dims=c(7,8), group.by= 'orig.ident')ggsave(paste0("PCA dim 7&8 "
, format(Sys.time(), "%Y-%m-%d")
, ".pdf"), width = 8, height = 6, dpi = 300)
PCAPlot(denovo_AML_data, dims=c(9,10), group.by= 'orig.ident')ggsave(paste0("PCA dim 9&10 "
, format(Sys.time(), "%Y-%m-%d")
, ".pdf"), width = 8, height = 6, dpi = 300)
ElbowPlot(denovo_AML_data, ndims=40)ggsave(paste0("PCA SD vs dim "
, format(Sys.time(), "%Y-%m-%d")
, ".pdf"), width = 8, height = 5, dpi = 300)Idents(denovo_AML_data) <- 'orig.ident'setwd("/Users/gexin/Library/CloudStorage/OneDrive-UniversityofCambridge/CamBioHackathon 2024")
saved.seed <- 5540
set.seed(saved.seed)
RunUMAP(
denovo_AML_data,
dims=1:30,
n.neighbors = 50,
min.dist = 1,
seed.use = saved.seed
) -> denovo_AML_data## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 11:54:36 UMAP embedding parameters a = 0.115 b = 1.929
## 11:54:36 Read 12599 rows and found 30 numeric columns
## 11:54:36 Using Annoy for neighbor search, n_neighbors = 50
## 11:54:36 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 11:54:38 Writing NN index file to temp file /var/folders/rx/vvf3pzf55v912fz97g52nz3m0000gn/T//RtmpmzbxDj/file84b5119e1f81
## 11:54:38 Searching Annoy index using 1 thread, search_k = 5000
## 11:54:45 Annoy recall = 100%
## 11:54:46 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 50
## 11:54:48 Initializing from normalized Laplacian + noise (using irlba)
## 11:54:49 Commencing optimization for 200 epochs, with 865768 positive edges
## 11:55:03 Optimization finished
DimPlot(denovo_AML_data,reduction = "umap", pt.size = 0.3)ggsave(paste0("UMAP all samples "
, format(Sys.time(), "%Y-%m-%d")
, ".pdf"), width = 8, height = 8, dpi = 300)
DimPlot(denovo_AML_data,reduction = "umap", pt.size = 0.3, split.by = "orig.ident", ncol = 2)ggsave(paste0("UMAP 3 samples separate "
, format(Sys.time(), "%Y-%m-%d")
, ".pdf"), width = 12, height = 10, dpi = 300)As AML3 seems completely off, we are removing it from the dataset for further analysis
(Below is trying to do batch correction but harmony somehow cannot be downloaded)
denovo_AML_data_no3 <- subset(denovo_AML_data, subset = orig.ident != "AML_3")setwd("/Users/gexin/Library/CloudStorage/OneDrive-UniversityofCambridge/CamBioHackathon 2024")
DimPlot(denovo_AML_data_no3,reduction = "umap", pt.size = 0.3)ggsave(paste0("UMAP 2 samples "
, format(Sys.time(), "%Y-%m-%d")
, ".pdf"), width = 8, height = 8, dpi = 300)
DimPlot(denovo_AML_data_no3,reduction = "umap", pt.size = 0.3, split.by = "orig.ident", ncol = 2)ggsave(paste0("UMAP 2 samples separate "
, format(Sys.time(), "%Y-%m-%d")
, ".pdf"), width = 12, height = 10, dpi = 300)FindNeighbors(denovo_AML_data_no3, reduction = "pca", dims = 1:30) -> denovo_AML_data_no3## Computing nearest neighbor graph
## Computing SNN
denovo_AML_data_no3@graphs$RNA_snn[1:10,1:10]## 10 x 10 sparse Matrix of class "dgCMatrix"
## [[ suppressing 10 column names 'AAACGGGCACCAGTTA-1_1', 'AAACGGGGTACCGGCT-1_1', 'AAAGATGAGCCGTCGT-1_1' ... ]]
##
## AAACGGGCACCAGTTA-1_1 1 . . . . . . . . .
## AAACGGGGTACCGGCT-1_1 . 1 . . . . . . . .
## AAAGATGAGCCGTCGT-1_1 . . 1 . . . . . . .
## AAAGATGCACTTCGAA-1_1 . . . 1 . . . . . .
## AAAGATGTCCCAAGAT-1_1 . . . . 1 . . . . .
## AAAGCAACACAGACAG-1_1 . . . . . 1 . . . .
## AAAGCAACATAAAGGT-1_1 . . . . . . 1 . . .
## AAAGCAACATCCTTGC-1_1 . . . . . . . 1.00000000 . 0.08108108
## AAAGCAATCTACCTGC-1_1 . . . . . . . . 1 .
## AAAGTAGAGGATGCGT-1_1 . . . . . . . 0.08108108 . 1.00000000
FindClusters(denovo_AML_data_no3, resolution = 0.1) -> denovo_AML_data_no3## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 6597
## Number of edges: 230574
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9291
## Number of communities: 6
## Elapsed time: 1 seconds
head(denovo_AML_data_no3$seurat_clusters, n=50)## AAACGGGCACCAGTTA-1_1 AAACGGGGTACCGGCT-1_1 AAAGATGAGCCGTCGT-1_1
## 0 0 1
## AAAGATGCACTTCGAA-1_1 AAAGATGTCCCAAGAT-1_1 AAAGCAACACAGACAG-1_1
## 0 0 2
## AAAGCAACATAAAGGT-1_1 AAAGCAACATCCTTGC-1_1 AAAGCAATCTACCTGC-1_1
## 0 0 2
## AAAGTAGAGGATGCGT-1_1 AAAGTAGTCCAAATGC-1_1 AAAGTAGTCGTAGGAG-1_1
## 0 0 1
## AAATGCCAGGACCACA-1_1 AAATGCCAGTTAAGTG-1_1 AAATGCCCATTTGCCC-1_1
## 0 0 0
## AACACGTAGAACAATC-1_1 AACACGTAGGACGAAA-1_1 AACCATGTCTTGTCAT-1_1
## 0 0 1
## AACCGCGAGAAGAAGC-1_1 AACCGCGCAAAGTCAA-1_1 AACCGCGCAGCTCGCA-1_1
## 0 0 0
## AACCGCGCATCTCGCT-1_1 AACCGCGGTTCTGTTT-1_1 AACCGCGTCCAGATCA-1_1
## 0 3 0
## AACGTTGAGGATATAC-1_1 AACGTTGCACTAAGTC-1_1 AACGTTGCAGCGATCC-1_1
## 0 0 0
## AACGTTGGTCAAAGCG-1_1 AACGTTGGTCCCTACT-1_1 AACGTTGGTTCCCTTG-1_1
## 0 0 0
## AACTCCCAGTGTACGG-1_1 AACTCCCCACCACGTG-1_1 AACTCTTCACGAGGTA-1_1
## 1 2 0
## AACTCTTGTGCAACGA-1_1 AACTCTTGTTCTGTTT-1_1 AACTCTTTCTCGGACG-1_1
## 0 0 0
## AACTGGTAGTGCGTGA-1_1 AACTGGTCAATCCGAT-1_1 AACTGGTGTGTAATGA-1_1
## 0 4 0
## AACTTTCAGATGTAAC-1_1 AACTTTCGTGAAATCA-1_1 AAGACCTAGCGTCTAT-1_1
## 1 3 1
## AAGGAGCCAAAGGTGC-1_1 AAGGCAGCAGCCTATA-1_1 AAGTCTGTCAGTTCGA-1_1
## 2 0 3
## AATCCAGCACCGCTAG-1_1 AATCCAGTCCGAAGAG-1_1 AATCGGTCACCGTTGG-1_1
## 0 0 0
## ACACCAACATGCTAGT-1_1 ACACCAAGTCTCTTAT-1_1
## 0 1
## Levels: 0 1 2 3 4 5
setwd("/Users/gexin/Library/CloudStorage/OneDrive-UniversityofCambridge/CamBioHackathon 2024")
DimPlot(denovo_AML_data_no3,reduction = "umap", pt.size = 0.3, label = TRUE, label.size = 8)ggsave(paste0("UMAP 2 samples with SNN clusters resolution 0.1 "
, format(Sys.time(), "%Y-%m-%d")
, ".pdf"), width = 8, height = 8, dpi = 300)
DimPlot(denovo_AML_data_no3,reduction = "umap", pt.size = 0.3, label = TRUE, label.size = 4, split.by = "orig.ident", ncol = 2) ggsave(paste0("UMAP 2 samples separate with SNN clusters resolution 0.1 "
, format(Sys.time(), "%Y-%m-%d")
, ".pdf"), width = 12, height = 10, dpi = 300)